/*******************************************************************************
																				
	DESCRIPTION: 	This do file cleans the predictions coming out of R, obtains the ensemble
					model, performs the last calibration step and saves the final dataset.
					
					This do file does it separately for each year, 
					i.e., it uses predictions that come out of the 
					103_caret_prediction_byYear.R
	
*******************************************************************************/

clear all
global id_code 003

* Import the cleaning program:
do  "${code}/Data Management/003_0_CleaningPredictions_program.do"

*******************************************************************************
** Baseline, 2006 **
*******************************************************************************

* Set outcomes:
global outcomes emplAft3M_0M_In emplAft6M_0M_In emplAft6M_6M_In emplAft6M_12M_In emplAft12M_0M_In

* Set models and year span:
global models Full
			
global yearSpan 2006/2006

foreach model in $models  {
		
		foreach year of numlist $yearSpan {	
		
			di "Cleaning `model' `year' Model"
			cleanpred, model(`model') year(`year') outcomes(${outcomes})
					
		}
}


*******************************************************************************
** Baseline, other years  **
*******************************************************************************

* Set outcomes:
global outcomes emplAft6M_0M_In emplAft6M_6M_In emplAft6M_12M_In 

* Set models and year span:
global models Full
			
global yearSpan 1992/2005 2007/2016

foreach model in $models  {
		
		foreach year of numlist $yearSpan {	
		
			di "Cleaning `model' `year' Model"
			cleanpred, model(`model') year(`year') outcomes(${outcomes})
					
		}
}

*******************************************************************************
** Baseline, pooled samples  **
*******************************************************************************

* Set outcomes:
global outcomes emplAft6M_0M_In emplAft6M_6M_In emplAft6M_12M_In 

* Set models and year span:
global models Full_Pooled
			
global yearSpan 2006_2007 2009_2010

foreach model in $models  {
		
		foreach year in $yearSpan {	
		
			di "Cleaning `model' `year' Model"
			cleanpred, model(`model') year(`year') outcomes(${outcomes})
					
		}
}

*******************************************************************************
** Other models, 2006  **
*******************************************************************************

* For basic model, compute all three outcomes:
global outcomes emplAft6M_0M_In emplAft6M_6M_In emplAft6M_12M_In 
global models Full_SeqDrop_incIndiv
global yearSpan 2006/2006

foreach model in $models  {
		
		foreach year of numlist $yearSpan {	
		
			di "Cleaning `model' `year' Model"
			cleanpred, model(`model') year(`year') outcomes(${outcomes})
					
		}
}

* For remaining models, compute just JFR at 0M:
global outcomes emplAft6M_0M_In

global models ///
			/// Marginal sub_models:
            Full_Marg_incIndiv Full_Marg_emplHist Full_Marg_incOther ///
            Full_Marg_incHist Full_Marg_migHist Full_Marg_indu ///
            Full_Marg_mun ///
            /// Sequential sub-models:
            Full_SeqDrop_mun Full_SeqDrop_indu Full_SeqDrop_incHist ///
            Full_SeqDrop_emplHist Full_SeqDrop_incOther ///
            Full_SeqDrop_migHist ///
			/// Models dropping past spell information from baseline:
			Full_DropPastSpells_DaysUnemp Full_DropPastSpells_unemplSpells ///
			Full_DropPastSpells_Both ///
			/// Extended models (full sample, missing dummies)
			EX_FullSample_UI EX_FullSample_WE EX_FullSample_OC ///
			EX_FullSample_RR EX_FullSample_IQ EX_FullSample_UM EX_FullSample_ALL ///
			EX_Ba_FullSample_UI EX_Ba_FullSample_WE EX_Ba_FullSample_OC ///
			EX_Ba_FullSample_RR EX_Ba_FullSample_IQ EX_Ba_FullSample_UM EX_Ba_FullSample_ALL ///
			/// Employment history models, variable groups: 
            BasicWithEmplHist_Basic BasicWithEmplHist_Yminus1  BasicWithEmplHist_Yminus2 ///
            BasicWithEmplHist_Yminus3 BasicWithEmplHist_Yminus4 BasicWithEmplHist_Yminus5 BasicWithEmplHist_All ///
            BasicWithEmplHist_Marg_Yminus1 BasicWithEmplHist_Marg_Yminus2 BasicWithEmplHist_Marg_Yminus3 ///
            BasicWithEmplHist_Marg_Yminus4 BasicWithEmplHist_Marg_Yminus5 BasicWithEmplHist_Marg_PreUnemp ///
            /// Individual variables;
            BasicWithEmplHist_IndivVars_Seq_DaysUnemp_2Years BasicWithEmplHist_IndivVars_Seq_unemplSpells2Ybefore ///
            BasicWithEmplHist_IndivVars_Seq_nEmployers2Y BasicWithEmplHist_IndivVars_Seq_DaysOnDI_2Years  BasicWithEmplHist_IndivVars_Seq_tenure ///
            BasicWithEmplHist_IndivVars_Seq_L_nEmployees_L1L2 BasicWithEmplHist_IndivVars_Seq_L_firmSizeChange_L1L2 ///
            BasicWithEmplHist_IndivVars_Seq_L_layoffRate_L1L2 BasicWithEmplHist_IndivVars_Seq_missings ///
            BasicWithEmplHist_IndivVars_Seq ///
            BasicWithEmplHist_IndivVars_Marg_DaysUnemp_2Years BasicWithEmplHist_IndivVars_Marg_unemplSpells2Ybefore /// 
            BasicWithEmplHist_IndivVars_Marg_nEmployers2Y BasicWithEmplHist_IndivVars_Marg_DaysOnDI_2Years  BasicWithEmplHist_IndivVars_Marg_tenure ///
            BasicWithEmplHist_IndivVars_Marg_L_nEmployees_L1L2 BasicWithEmplHist_IndivVars_Marg_L_firmSizeChange_L1L2 ///
            BasicWithEmplHist_IndivVars_Marg_L_layoffRate_L1L2 BasicWithEmplHist_IndivVars_Marg_missings ///

			
global yearSpan 2006/2006

foreach model in $models  {
		
		foreach year of numlist $yearSpan {	
		
			di "Cleaning `model' `year' Model"
			cleanpred, model(`model') year(`year') outcomes(${outcomes})
					
		}
}

*******************************************************************************
** Tuning & other robustness checks, 2006  **
*******************************************************************************

* Set outcomes:
global outcomes emplAft6M_0M_In

* Model without trainees:
cleanpred, model(Full_NoTraining) year(2006) outcomes(${outcomes}) ///
	w_if(training_combined_6months == 0)

* Model with 10-20-20 split:
cleanpred, model(Full) submodel(_ML_Robustness_1020Split) year(2006) outcomes(${outcomes}) ///
	w_sample(`= 2/7')		

* Model with 20-20-10 split:		
cleanpred, model(Full) submodel(_ML_Robustness_2020Split) year(2006) outcomes(${outcomes}) ///
	w_sample(`= 1/6')	
	
* Model with 10-40-10 split:		
cleanpred, model(Full) submodel(_ML_Robustness_1040Split) year(2006) outcomes(${outcomes}) ///
	w_sample(`= 1/5')
	
* Model with spline:
cleanpred, model(Full) year(2006) outcomes(${outcomes}) ///
	spline(`= 1/5') surname(_withSpline)
	
* Model with positive weights:
cleanpred, model(Full) year(2006) outcomes(${outcomes}) positive surname(_PositiveWeights)
	
* Model without recalls:
global outcomes emplAft6M_0M_In emplAft6M_6M_In emplAft6M_12M_In

cleanpred, model(Full_NoRecalls) year(2006) outcomes(${outcomes}) ///
	w_if(recalled == 0)